Setup

# Import functions
root = "./"
source(file.path(root,"general_functions.R"))
import_parameters(params)
## **** __Utilized Cores__ **** = 2$subsetGenes
## [1] "protein_coding"
## 
## $subsetCells
## [1] 500
## 
## $resolution
## [1] 0.6
## 
## $resultsPath
## [1] "./Results"
## 
## $nCores
## [1] 2
## 
## $perplexity
## [1] 30
# load(file.path(resultsPath, "scRNAseq_results.RData"))
# resultsPath <- "Results/subsetGenes-protein_coding__subsetCells-F__Resolution-0.2__perplexity-40__nCores-4"
# load(file.path(resultsPath,"scRNAseq_results.RData")) 

library(Seurat)
library(cowplot)
library(ggplot2)
library(dplyr) 
library(data.table) 
library(readxl) 
library(reshape2)
library(ggrepel)
library(plotly)

library(GeneOverlap) # BiocManager::install("GeneOverlap") 
library(enrichR) #BiocManager::install("enrichR")

## Upgrade to alpha (development) version of Monocle, as this version has been optimized for large datasets
if("DDRTree" %in% rownames(installed.packages())==F){
  devtools::install_github("cole-trapnell-lab/DDRTree", ref="simple-ppt-like")
}
if("graph" %in% rownames(installed.packages())==F){
 devtools::install_github("cole-trapnell-lab/L1-graph") 
}
if("reticulate" %in% rownames(installed.packages())==F){
 install.packages("reticulate")
}
if("flexclust" %in% rownames(installed.packages())==F){
 install.packages("flexclust")
}
if("mcclust" %in% rownames(installed.packages())==F){
 install.packages("mcclust")
}
library(flexclust)
library(mcclust)
library(reticulate)
if(py_module_available("umap-learn")==F){
  reticulate::py_install('umap-learn')# , pip = T, pip_ignore_installed = T # Ensure the latest version of UMAP is installed 
}
## virtualenv: ~/.virtualenvs/r-reticulate 
## Installing packages ...
## 
## Installation complete.
if(py_module_available("louvain")==F){
  reticulate::py_install("louvain")
  system("pip install louvain --user")
}
if("monocle" %in% rownames(installed.packages())==F){
  devtools::install_github("cole-trapnell-lab/monocle-release", ref="monocle3_alpha") 
}
if("ggrastr" %in% rownames(installed.packages())==F){
  devtools::install_github("VPetukhov/ggrastr")
}

library(monocle) # BiocManager::install("monocle") 
# BiocManager::install(c('DelayedArray', 'DelayedMatrixStats', 'org.Hs.eg.db', 'org.Mm.eg.db'))
library(org.Hs.eg.db)
library(garnett) # devtools::install_github("cole-trapnell-lab/garnett")
 
#  
if(getwd()=="/Users/schilder/Desktop/PD_scRNAseq"){
  allGenes <- F
}else{allGenes <- T}
allGenes
## [1] FALSE
sessioninfo::session_info()
## ─ Session info ──────────────────────────────────────────────────────────
##  setting  value                       
##  version  R version 3.5.1 (2018-07-02)
##  os       macOS  10.14.2              
##  system   x86_64, darwin15.6.0        
##  ui       X11                         
##  language (EN)                        
##  collate  en_US.UTF-8                 
##  ctype    en_US.UTF-8                 
##  tz       America/New_York            
##  date     2019-03-01                  
## 
## ─ Packages ──────────────────────────────────────────────────────────────
##  package          * version    date       lib
##  acepack            1.4.1      2016-10-29 [2]
##  AnnotationDbi    * 1.44.0     2018-10-30 [2]
##  ape                5.2        2018-09-24 [2]
##  assertthat         0.2.0      2017-04-11 [2]
##  backports          1.1.3      2018-12-14 [2]
##  base64enc          0.1-3      2015-07-28 [2]
##  bibtex             0.4.2      2017-06-30 [2]
##  Biobase          * 2.42.0     2018-10-30 [2]
##  BiocGenerics     * 0.28.0     2018-10-30 [2]
##  BiocParallel     * 1.16.6     2019-02-10 [2]
##  bit                1.1-14     2018-05-29 [2]
##  bit64              0.9-7      2017-05-08 [2]
##  bitops             1.0-6      2013-08-17 [2]
##  blob               1.1.1      2018-03-25 [2]
##  boot               1.3-20     2017-08-06 [2]
##  caTools            1.17.1.1   2018-07-20 [2]
##  cellranger         1.1.0      2016-07-27 [2]
##  checkmate          1.9.1      2019-01-15 [2]
##  class              7.3-15     2019-01-01 [2]
##  classInt           0.3-1      2018-12-18 [1]
##  cli                1.0.1      2018-09-25 [2]
##  cluster            2.0.7-1    2018-04-13 [2]
##  coda               0.19-2     2018-10-08 [2]
##  codetools          0.2-16     2018-12-24 [2]
##  colorspace         1.4-0      2019-01-13 [2]
##  cowplot          * 0.9.4      2019-01-08 [2]
##  crayon             1.3.4      2017-09-16 [2]
##  crosstalk          1.0.0      2016-12-21 [2]
##  data.table       * 1.12.0     2019-01-13 [2]
##  DBI                1.0.0      2018-05-02 [2]
##  DDRTree          * 0.1.5      2019-02-22 [1]
##  DelayedArray     * 0.8.0      2018-10-30 [2]
##  deldir             0.1-16     2019-01-04 [1]
##  densityClust       0.3        2017-10-24 [2]
##  DEoptimR           1.0-8      2016-11-19 [2]
##  digest             0.6.18     2018-10-10 [2]
##  diptest            0.75-7     2016-12-05 [2]
##  docopt             0.6.1      2018-10-11 [2]
##  doParallel         1.0.14     2018-09-24 [2]
##  doSNOW             1.0.16     2017-12-13 [2]
##  dplyr            * 0.8.0.1    2019-02-15 [2]
##  dtw                1.20-1     2018-05-18 [2]
##  e1071              1.7-0.1    2019-01-21 [1]
##  enrichR          * 1.0        2017-04-02 [2]
##  evaluate           0.13       2019-02-12 [2]
##  expm               0.999-3    2018-09-22 [2]
##  fastICA            1.2-1      2017-06-12 [2]
##  fitdistrplus       1.0-14     2019-01-23 [2]
##  flexclust        * 1.4-0      2018-09-24 [1]
##  flexmix            2.3-15     2019-02-18 [2]
##  FNN                1.1.3      2019-02-15 [2]
##  foreach            1.4.4      2017-12-12 [2]
##  foreign            0.8-71     2018-07-20 [2]
##  Formula            1.2-3      2018-05-03 [2]
##  fpc                2.1-11.1   2018-07-20 [2]
##  future             1.11.1.1   2019-01-26 [2]
##  garnett          * 0.1.3      2019-02-15 [2]
##  gbRd               0.4-11     2012-10-01 [2]
##  gdata              2.18.0     2017-06-06 [2]
##  GeneOverlap      * 1.18.0     2018-10-30 [1]
##  ggplot2          * 3.1.0      2018-10-25 [2]
##  ggrepel          * 0.8.0      2018-05-09 [2]
##  ggridges           0.5.1      2018-09-27 [2]
##  glmnet             2.0-16     2018-04-02 [2]
##  globals            0.12.4     2018-10-11 [2]
##  glue               1.3.0      2018-07-17 [2]
##  gmodels            2.18.1     2018-06-25 [1]
##  gplots             3.0.1.1    2019-01-27 [2]
##  gridExtra          2.3        2017-09-09 [2]
##  gtable             0.2.0      2016-02-26 [2]
##  gtools             3.8.1      2018-06-26 [2]
##  hdf5r              1.0.1      2018-10-07 [2]
##  Hmisc              4.2-0      2019-01-26 [2]
##  HSMMSingleCell     1.2.0      2018-11-01 [2]
##  htmlTable          1.13.1     2019-01-07 [2]
##  htmltools          0.3.6      2017-04-28 [2]
##  htmlwidgets        1.3        2018-09-30 [2]
##  httpuv             1.4.5.1    2018-12-18 [2]
##  httr               1.4.0      2018-12-11 [2]
##  ica                1.0-2      2018-05-24 [2]
##  igraph           * 1.2.4      2019-02-13 [2]
##  IRanges          * 2.16.0     2018-10-30 [2]
##  irlba            * 2.3.3      2019-02-05 [2]
##  iterators          1.0.10     2018-07-13 [2]
##  jsonlite           1.6        2018-12-07 [2]
##  kernlab            0.9-27     2018-08-10 [2]
##  KernSmooth         2.23-15    2015-06-29 [2]
##  knitr              1.21       2018-12-10 [2]
##  L1Graph          * 0.1.1      2019-02-22 [1]
##  lars               1.2        2013-04-24 [2]
##  later              0.8.0      2019-02-11 [2]
##  lattice          * 0.20-38    2018-11-04 [2]
##  latticeExtra       0.6-28     2016-02-09 [2]
##  lazyeval           0.2.1      2017-10-29 [2]
##  LearnBayes         2.15.1     2018-03-18 [1]
##  limma              3.38.3     2018-12-02 [2]
##  listenv            0.7.0      2018-01-21 [2]
##  lmtest             0.9-36     2018-04-04 [2]
##  lpSolve          * 5.6.13     2015-09-19 [2]
##  lpSolveAPI       * 5.5.2.0-17 2016-01-13 [1]
##  lsei               1.2-0      2017-10-23 [2]
##  magrittr           1.5        2014-11-22 [2]
##  manipulateWidget   0.10.0     2018-06-11 [2]
##  MASS               7.3-51.1   2018-11-01 [2]
##  Matrix           * 1.2-15     2018-11-01 [2]
##  matrixStats      * 0.54.0     2018-07-23 [2]
##  mcclust          * 1.0        2012-07-23 [1]
##  mclust             5.4.2      2018-11-17 [2]
##  memoise            1.1.0      2017-04-21 [2]
##  metap              1.1        2019-02-06 [2]
##  mime               0.6        2018-10-05 [2]
##  miniUI             0.1.1.1    2018-05-18 [2]
##  mixtools           1.1.0      2017-03-10 [2]
##  modeltools       * 0.2-22     2018-07-16 [2]
##  monocle          * 2.99.3     2019-02-22 [1]
##  munsell            0.5.0      2018-06-12 [2]
##  mvtnorm            1.0-8      2018-05-31 [2]
##  nlme               3.1-137    2018-04-07 [2]
##  nnet               7.3-12     2016-02-02 [2]
##  npsurv             0.4-0      2017-10-14 [2]
##  org.Hs.eg.db     * 3.7.0      2019-02-15 [2]
##  pbapply            1.4-0      2019-02-05 [2]
##  pbmcapply          1.3.1      2019-01-14 [1]
##  pheatmap           1.0.12     2019-01-04 [2]
##  pillar             1.3.1      2018-12-15 [2]
##  pkgconfig          2.0.2      2018-08-16 [2]
##  plotly           * 4.8.0      2018-07-20 [2]
##  plyr               1.8.4      2016-06-08 [2]
##  png                0.1-7      2013-12-03 [2]
##  prabclus           2.2-7      2019-01-17 [2]
##  promises           1.0.1      2018-04-13 [2]
##  proxy              0.4-22     2018-04-08 [2]
##  purrr              0.3.0      2019-01-27 [2]
##  qlcMatrix          0.9.7      2018-04-20 [2]
##  R.methodsS3        1.7.1      2016-02-16 [2]
##  R.oo               1.22.0     2018-04-22 [2]
##  R.utils            2.8.0      2019-02-14 [2]
##  R6                 2.4.0      2019-02-14 [2]
##  RANN               2.6.1      2019-01-08 [2]
##  RColorBrewer       1.1-2      2014-12-07 [2]
##  Rcpp               1.0.0      2018-11-07 [2]
##  Rdpack             0.10-1     2018-10-04 [2]
##  readxl           * 1.3.0      2019-02-15 [2]
##  reshape2         * 1.4.3      2017-12-11 [2]
##  reticulate       * 1.10       2018-08-05 [2]
##  rgl                0.99.16    2018-03-28 [2]
##  rjson              0.2.20     2018-06-08 [2]
##  rlang              0.3.1      2019-01-08 [2]
##  rmarkdown          1.11       2018-12-08 [2]
##  robustbase         0.93-3     2018-09-21 [2]
##  ROCR               1.0-7      2015-03-26 [2]
##  rpart              4.1-13     2018-02-23 [2]
##  RSQLite            2.1.1      2018-05-06 [2]
##  rstudioapi         0.9.0      2019-01-09 [2]
##  Rtsne              0.15       2018-11-10 [2]
##  S4Vectors        * 0.20.1     2018-11-09 [2]
##  scales             1.0.0      2018-08-09 [2]
##  SDMTools           1.1-221    2014-08-05 [2]
##  segmented          0.5-3.0    2017-11-30 [2]
##  sessioninfo        1.1.1      2018-11-05 [2]
##  Seurat           * 2.3.4      2018-07-17 [2]
##  sf                 0.7-3      2019-02-21 [1]
##  shiny              1.2.0      2018-11-02 [2]
##  slam               0.1-44     2018-12-21 [2]
##  snow               0.4-3      2018-09-14 [2]
##  sp                 1.3-1      2018-06-05 [1]
##  sparsesvd          0.1-4      2018-02-15 [2]
##  spData             0.3.0      2019-01-07 [1]
##  spdep              1.0-2      2019-02-13 [1]
##  stringi            1.3.1      2019-02-13 [2]
##  stringr            1.4.0      2019-02-10 [2]
##  survival           2.43-3     2018-11-26 [2]
##  tibble             2.0.1      2019-01-12 [2]
##  tidyr              0.8.2      2018-10-28 [2]
##  tidyselect         0.2.5      2018-10-11 [2]
##  trimcluster        0.1-2.1    2018-07-20 [2]
##  tsne               0.1-3      2016-07-15 [2]
##  units              0.6-2      2018-12-05 [1]
##  VGAM               1.1-1      2019-02-18 [1]
##  viridis            0.5.1      2018-03-29 [2]
##  viridisLite        0.3.0      2018-02-01 [2]
##  webshot            0.5.1      2018-09-28 [2]
##  withr              2.1.2      2018-03-15 [2]
##  xfun               0.5        2019-02-20 [1]
##  xtable             1.8-3      2018-08-29 [2]
##  yaml               2.2.0      2018-07-25 [2]
##  zoo                1.8-4      2018-09-19 [2]
##  source                                            
##  CRAN (R 3.5.0)                                    
##  Bioconductor                                      
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  Bioconductor                                      
##  Bioconductor                                      
##  Bioconductor                                      
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.1)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.1)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.1)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  Github (cole-trapnell-lab/DDRTree@43f3dc3)        
##  Bioconductor                                      
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.1)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  Github (cole-trapnell-lab/garnett@d04a89b)        
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  Bioconductor                                      
##  Github (cran/ggplot2@13ca86f)                     
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  Bioconductor                                      
##  CRAN (R 3.5.1)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  Bioconductor                                      
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.1)                                    
##  CRAN (R 3.5.1)                                    
##  Github (cole-trapnell-lab/L1-graph@c2a3f26)       
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.1)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  Bioconductor                                      
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  Github (cole-trapnell-lab/monocle-release@4d37597)
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.1)                                    
##  CRAN (R 3.5.1)                                    
##  CRAN (R 3.5.0)                                    
##  Bioconductor                                      
##  CRAN (R 3.5.1)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.1)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.1)                                    
##  CRAN (R 3.5.1)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.1)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.1)                                    
##  CRAN (R 3.5.0)                                    
##  Bioconductor                                      
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.1)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.1)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.2)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
##  CRAN (R 3.5.0)                                    
## 
## [1] /Users/schilder/Library/R/3.5/library
## [2] /Library/Frameworks/R.framework/Versions/3.5/Resources/library

Monocle

Convert and Normalize

  • Convert from Seurat object to CDS object and re-normalize the data from scratch (importCDS currently only converts the raw.data, not the scale.data).
  • Alternatively, you can do your normalization in Seurat and then construct a new CDS object manually with the scale.data from the Seurat object.
# Import Seurat obj
load(file.path(root, "Data/seurat_object_add_HTO_ids.Rdata"))
DAT <- seurat.obj  
rm(seurat.obj)
# Add metadata
metadata <- read.table(file.path(root,"Data/meta.data4.tsv"))
DAT <- AddMetaData(object = DAT, metadata = metadata)  
protDAT <- subsetBiotypes(DAT, subsetGenes = "protein_coding")
## Subsetting genes: protein_coding 
## 14827 / 24914 genes are protein_coding
protDAT <- remove_nonmatched_metadata(protDAT, subsetCells = F)
protDAT <- FindVariableGenes(object = protDAT, mean.function = ExpMean, 
                         dispersion.function = LogVMR, 
                         selection.method ="dispersion", do.plot = T, 
                         top.genes = 2000)
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, :
## Binning grid too coarse for current (small) bandwidth: consider increasing
## 'gridsize'

# Pass TRUE if you want to see progress output on some of Monocle 3's operations
DelayedArray:::set_verbose_block_processing(TRUE)
## [1] FALSE
# Passing a higher value will make some computations faster but use more memory. Adjust with caution!
options(DelayedArray.block.size=1000e6)

## Construct CDS object manually
# mDAT <- newCellDataSet(cellData = DAT@scale.data,
#                        featureData = AnnotatedDataFrame( data.frame(gene_short_name=row.names(DAT@scale.data),
#                                                                     row.names = row.names(DAT@scale.data))
#                          ),
#                        phenoData = AnnotatedDataFrame(DAT@meta.data))
# mDAT <- estimateSizeFactors(mDAT) #DESeq2?
# mDAT <- preprocessCDS(mDAT, method = "PCA", num_dim = 10, norm_method = "none")


## Construct CDS object automaticaly
### NOTE!: importCDS takes only the raw.data (not scale.data)
mDAT <- monocle::importCDS(protDAT,  import_all = T)
mDAT <- estimateSizeFactors(mDAT) #DESeq2?
mDAT <- estimateDispersions(mDAT)
## Processing block 1/23 ... OK
## Processing block 2/23 ... OK
## Processing block 3/23 ... OK
## Processing block 4/23 ... OK
## Processing block 5/23 ... OK
## Processing block 6/23 ... OK
## Processing block 7/23 ... OK
## Processing block 8/23 ... OK
## Processing block 9/23 ... OK
## Processing block 10/23 ... OK
## Processing block 11/23 ... OK
## Processing block 12/23 ... OK
## Processing block 13/23 ... OK
## Processing block 14/23 ... OK
## Processing block 15/23 ... OK
## Processing block 16/23 ... OK
## Processing block 17/23 ... OK
## Processing block 18/23 ... OK
## Processing block 19/23 ... OK
## Processing block 20/23 ... OK
## Processing block 21/23 ... OK
## Processing block 22/23 ... OK
## Processing block 23/23 ... OK
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm stopped at boundary value
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning in parametricDispersionFit(disp_table, verbose): Dispersion fit did
## not converge.
## Removing 203 outliers
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm stopped at boundary value
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning in parametricDispersionFit(disp_table[row.names(disp_table) %in% :
## Dispersion fit did not converge.
# Regressing out ID will also regress out mutation status 
mDAT <- preprocessCDS(mDAT, num_dim = 20, residualModelFormulaStr = "~ nUMI + percent.mito") 
## Warning in if (method == "PCA") {: the condition has length > 1 and only
## the first element will be used
## Processing block 1/4 ... OK
## Processing block 2/4 ... OK
## Processing block 3/4 ... OK
## Processing block 4/4 ... OK

Dimensionality Reduction

# !Important! (allows replicability)
set.seed(1) # Monocle sets their seed to 2016 by default (Seurat sets it to 1)
# Options: c("UMAP", "tSNE", "DDRTree", "ICA", "none")

## UMAP
# monocle:::UMAP( )
mDAT <- reduceDimension(mDAT, reduction_method = 'UMAP', 
                        max_components = 3, 
                        metric="cosine",
                        n_threads =  parallel::detectCores())
## t-SNE
# mDAT <- reduceDimension(mDAT, reduction_method = 'tSNE', 
#                         perplexity = 30,
#                         scaling = F,
#                         num_threads = parallel::detectCores()) 

pretty_colors <- function(mDAT, var="Cluster", palette="custom"){
  unique_var <- unique(as.character(pData(mDAT)[var][,1]))
  if(palette=="custom"){
    col_vector_origin <- c("mediumorchid","deepskyblue","limegreen","steelblue",
                           "hotpink","turquoise", "blueviolet","mediumvioletred",
                           
                           "#db83da","#53c35d","#a546bb","#718fe8","#a469e6",
                         "#babb3d","#4f66dc","#e68821","#83b837","#d6ac3e",
                         "#7957b4","#468e36","#d347ae","#5dbf8c","#e53e76",
                         "#42c9b8","#dd454a","#3bbac6","#d5542c","#59aadc",
                         "#cf8b36","#4a61b0","#8b8927","#a24e99","#9cb36a",
                         "#ca3e87","#36815b","#b23c4e","#5c702c","#b79add",
                         "#a55620","#5076af","#e38f67","#85609c","#caa569",
                         "#9b466c","#88692c","#dd81a9","#a35545","#e08083",
                         "#17becf","#9edae5")
  } else{col_vector_origin <- RColorBrewer::brewer.pal(length(unique_var),palette)} 
  # barplot(1:length(col_vector_origin) , col=col_vector_origin) 
  col_vector <- col_vector_origin[1:length(unique_var)]
  names(col_vector) <- unique_var
  return(col_vector)
} 

Clustering

  • “Here res represents the resolution parameter (range from 0-1) for the louvain clustering. Values between 0 and 1e-2 are good, bigger values give you more clusters. Default is set to be 1e-4. louvain_iter represents the number of iterations used for Louvain clustering. The clustering result gives the largest modularity score that will be used as the final clustering result. The default is 1.”
plotDR <- function(resDAT, metavar="Cluster", title=""){ 
  options(repr.plot.width = 11)
  options(repr.plot.height = 8)
  col_vector <- pretty_colors(resDAT, var=metavar) 
  p <- plot_cell_clusters(resDAT,
                     color_by = metavar,
                     cell_size = 0.8,
                     show_group_id = T)  + 
    scale_color_manual(values = col_vector) +
    theme(legend.text=element_text(size=6)) +
    theme(legend.position="right")+ 
    labs(title = paste(title)) 
  print(p)
}

test_hyperparameters <- function(mDAT, resolutions=seq(0, 1e-4,length.out=6),
                                 Ks=seq(10,60,length.out=6), 
                                 iter_var="k"){
  if(iter_var=="resolution"){
    for(res in resolutions){
      res_title <- paste("Resolution =",res)
      cat("\n")
      cat("###",res_title,"\n")
      try({ 
       resDAT <- clusterCells(mDAT, res=res,# k=43,
                          method = "louvain",# densityPeak
                          louvain_iter = 1,
                          verbose = F, 
                          clustering_genes = clustering_genes,
                          cores = parallel::detectCores())
      plotDR(resDAT, title=res_title)  
      }) 
      cat("\n")
    }  
  }else{
    for(k in Ks){
      k_title <- paste("K =",k)
      cat("\n")
      cat("###",k_title,"\n")
      try({ 
       kDAT <- clusterCells(mDAT, res=8.888889e-05, k=k,
                          method = "louvain",# densityPeak
                          louvain_iter = 1,
                          verbose = F, 
                          clustering_genes = clustering_genes,
                          cores = parallel::detectCores())
      plotDR(kDAT, title=k_title)  
      })
      cat("\n")
    }   
  } 
}
test_hyperparameters(mDAT, iter_var="resolution")

Resolution = 0

## Loading required package: ggrastr

Resolution = 2e-05

Resolution = 4e-05

Resolution = 6e-05

Resolution = 8e-05

Resolution = 1e-04

# test_hyperparameters(mDAT, iter_var="k")

# Select final resolution
## Use ONLY the 1000 most variable genes to do clustering (otherwise there's too much noise)
clustering_genes <- intersect(mDAT@featureData@data$gene_short_name, protDAT@var.genes)[1:1000]
louvain_res <- 1e-04 # 1e-04 is the default
mDAT <- clusterCells(mDAT, res=louvain_res,
                        method = "louvain",
                        louvain_iter = 1, 
                        clustering_genes = clustering_genes,
                        verbose = T, cores = parallel::detectCores())
## Run kNN based graph clustering starts:
##   -Input data of 19144 rows and 3 columns
##   -k is set to 20

Finding nearest neighbors…DONE ~ 0.124 s Compute jaccard coefficient between nearest-neighbor sets …DONE ~ 0.008 s Build undirected graph from the weighted links …DONE ~ 0.121 s Run louvain clustering on the graph … Running louvain iteration 1 …

## Current iteration is 1; current resolution is 1e-04; Modularity is 0.246722458537894; Number of clusters are 6
## Maximal modularity is 0.246722458537894; corresponding resolution is 1e-04
## 
## Run kNN based graph clustering DONE, totally takes 3.21154999732971 s.

-Number of clusters: 6

# mDAT <- clusterCells(mDAT, num_clusters = 4,
#                         method = "densityPeak",    
#                         verbose = F,  
#                         cores = parallel::detectCores())

plotDR(mDAT, "Cluster")

plotDR(mDAT, "dx")

plotDR(mDAT, "mut")

# CLUSTERING FROM SEURAT
# plotDR(mDAT, "post_clustering")

Identify Cell Types with Garnett

# generate size factors for normalization later
# Get pre-trained PBMC classifer 
load(file.path(root, "Data/Garnett/hsPBMC")) # Download from: https://cole-trapnell-lab.github.io/garnett/classifiers/hsPBMC  

mDAT <- garnett::classify_cells(mDAT, 
                           classifier = hsPBMC,
                           db = org.Hs.eg.db,
                           cluster_extend = TRUE,
                           cds_gene_id_type = "SYMBOL")
## Processing block 1/22 ... OK
## Processing block 2/22 ... OK
## Processing block 3/22 ... OK
## Processing block 4/22 ... OK
## Processing block 5/22 ... OK
## Processing block 6/22 ... OK
## Processing block 7/22 ... OK
## Processing block 8/22 ... OK
## Processing block 9/22 ... OK
## Processing block 10/22 ... OK
## Processing block 11/22 ... OK
## Processing block 12/22 ... OK
## Processing block 13/22 ... OK
## Processing block 14/22 ... OK
## Processing block 15/22 ... OK
## Processing block 16/22 ... OK
## Processing block 17/22 ... OK
## Processing block 18/22 ... OK
## Processing block 19/22 ... OK
## Processing block 20/22 ... OK
## Processing block 21/22 ... OK
## Processing block 22/22 ... OK
## Warning in doTryCatch(return(expr), name, parentenv, handler): The
## following genes used in the classifier are not present in the input CDS.
## Interpret with caution. ENSG00000174059The following genes used in the
## classifier are not present in the input CDS. Interpret with caution.
## ENSG00000007062The following genes used in the classifier are not present
## in the input CDS. Interpret with caution. ENSG00000157404The following
## genes used in the classifier are not present in the input CDS. Interpret
## with caution. ENSG00000185291
table(pData(mDAT)$cell_type)
## 
##         B cells           CD34+     CD4 T cells     CD8 T cells 
##              13              92               3               1 
## Dendritic cells       Monocytes        NK cells         T cells 
##             282           15029             804              17 
##         Unknown 
##            2903
cell_summary <- table(pData(mDAT)$cluster_ext_type) 
cell_summary
## 
##         B cells           CD34+     CD4 T cells     CD8 T cells 
##              12              13               3               1 
## Dendritic cells       Monocytes        NK cells         T cells 
##             144           17435             686               9 
##         Unknown 
##             841
# Get feature genes for each cell type
feature_genes <- garnett::get_feature_genes(classifier = hsPBMC,
                                   node = "root",
                                   db = org.Hs.eg.db,
                                   convert_ids = F)
head(feature_genes)  
##                      B cells       CD34+ Dendritic cells     Monocytes
## (Intercept)     -0.884989922  0.47694427    -0.124629734 -0.0301907756
## ENSG00000196154 -0.005499815 -0.01088859    -0.007978714  0.0056365451
## ENSG00000019582  0.030146634 -0.01959215     0.015232191  0.0001883507
## ENSG00000156738  2.193353690 -0.66191294    -1.104899464 -0.7393680112
## ENSG00000177455  2.056641938 -0.71851810    -1.941859710 -1.0448682467
## ENSG00000105369  1.800296145 -0.53835614     0.063380804 -0.1973728377
##                     NK cells      T cells     Unknown
## (Intercept)     -0.911657164  0.524757370 0.949765956
## ENSG00000196154 -0.001627233 -0.005835858 0.026193667
## ENSG00000019582 -0.005880096 -0.023473024 0.003378099
## ENSG00000156738 -0.096566144 -0.602417257 1.011810124
## ENSG00000177455  0.297279508  0.177790907 1.173533704
## ENSG00000105369 -0.618567693 -0.963141845 0.453761563
# If a cell type is called less than n times, re-categorize it as unknown
mDAT$cluster_ext_type_filt <- ifelse(mDAT$cluster_ext_type %in% names(cell_summary[cell_summary<50]), 
                                     "Unknown", mDAT$cluster_ext_type)

plot_cell_clusters(mDAT, color_by ="cell_type", cell_size = .8) +  facet_wrap(~dx)

plot_cell_clusters(mDAT, color_by ="cluster_ext_type", cell_size = .8)  

plot_cell_clusters(mDAT, color_by ="cluster_ext_type_filt", cell_size = .8) 

Gene Expression

Monocyte Markers

plot_cell_clusters(mDAT, markers = c("CD14","FCGR3A"), cell_size = 0.8)
## Warning: Using alpha for a discrete variable is not advised.

PD Genes

plot_cell_clusters(mDAT, markers = c("LRRK2","GBA"), cell_size = 0.8)
## Warning: Using alpha for a discrete variable is not advised.

Selected Genes

plot_cell_clusters(mDAT, markers = c("MS4A4A","MS4A6A"), cell_size = 0.8) 
## Warning: Using alpha for a discrete variable is not advised.

Heatmap

plot_markers_cluster(mDAT, markers = protDAT@var.genes[1:50], minimal_cluster_fraction = 0.05)

Pseudotime

# subsetCDS(mDAT, )
mDAT <- partitionCells(mDAT)
mDAT <- learnGraph(mDAT, RGE_method = 'SimplePPT',
                   n_threads =  parallel::detectCores())
## Warning: Deprecated, use tibble::rownames_to_column() instead.

## Warning: Deprecated, use tibble::rownames_to_column() instead.

## Warning: Deprecated, use tibble::rownames_to_column() instead.
plot_cell_trajectory(mDAT, color_by = "Cluster", cell_size = .01) 

plot_cell_trajectory(mDAT, color_by = "dx", cell_size = .01) 

plot_cell_trajectory(mDAT, color_by = "mut", cell_size = .01) 

plot_cell_trajectory(mDAT, color_by = "cluster_ext_type_filt", cell_size = .01) 

# dir.create(file.path(resultsPath, "pseudotime"), showWarnings = F)
# plot_3d_cell_trajectory(mDAT,
#                         color_by="cell_type",
#                         webGL_filename= file.path(resultsPath, "pseudotime/pseudotime_cellType.html"),
#                         show_backbone=TRUE,
#                         useNULL_GLdev=TRUE)
# plot_3d_cell_trajectory(mDAT, markers = c('FCGR3A'),
#                         webGL_filename=file.path(resultsPath, "pseudotime/pseudotime_FCGR3A.html"),
#                         show_backbone=TRUE,
#                         useNULL_GLdev=TRUE)

Differential Expression: Monocle

  • Tutorial
  • qval = FDR *“The first of the two models is called the full model. This model is essentially a way of predicting the expression value of each gene in a given cell knowing only whether that cell is a fibroblast or a myoblast. The second model, called the reduced model, does the same thing, but it doesn’t know the CellType for each cell. It has to come up with a reasonable prediction of the expression value for the gene that will be used for all the cells…The question Monocle must answer for each gene is how much better the full model’s prediction is than the reduced model’s.”
mDAT_sub <- mDAT[protDAT@var.genes[1:50], mDAT@phenoData$Cluster %in% c(1,2)]

# At ~1min/100 genes, this DGE method will take ~2.5hours to run for 14827 genes 
DEG_df <- differentialGeneTest(mDAT_sub, verbose = T,
                               fullModelFormulaStr = "~Cluster",
                               cores = parallel::detectCores())

# Plot
mDAT_sub_genes <- mDAT_sub[row.names(DEG_df)[1:4],]
plot_genes_jitter(mDAT_sub_genes, grouping = "Cluster", ncol= 4, color_by = "dx", plot_trend = T)

Convert back to Seurat

CDS_to_Seurat <- function(cds, export_PC=F, export_UMAP=F, export_tSNE=F){ 
  # sum(colnames(mDAT) != colnames( mDAT@reducedDimS)) 
  ## Manually save reduced dimensions
  if(export_PC==T){
    mDAT$PC1 <- mDAT@normalized_data_projection[,1]
    mDAT$PC2 <- mDAT@normalized_data_projection[,2]
    mDAT$PC3 <- mDAT@normalized_data_projection[,3] 
  }
  if(export_UMAP==T){
    mDAT$UMAP1 <- mDAT@reducedDimA[1,]
    mDAT$UMAP2 <- mDAT@reducedDimA[2,]
    mDAT$UMAP3 <- mDAT@reducedDimA[3,]
  }
  DAT <- exportCDS(mDAT, export_to = "Seurat", export_all = T)
  DAT@scale.data <- DAT@data #Data was already scaled in Monocle
  # DAT <- Seurat::AddMetaData(DAT, pData(mDAT)[c("garnett_cluster","cell_type","cluster_ext_type")])
  return(DAT)
}
DAT <- CDS_to_Seurat(mDAT, export_PC = T, export_UMAP = T)
## Scaling data matrix
     head(DAT@meta.data) 
##                  nGene nUMI orig.ident singlet.or.not.binary       HTO
## GTCATTTTCCGCATCT  2035 6729  RAJ_13357                     1 NYUMD0011
## ATAAGAGAGGAGTTGC  1914 6718  RAJ_13357                     1   BID0076
## CATTATCCATGGTCTA  1971 6156  RAJ_13357                     1  MSMD0067
## CTTAGGAGTGGCGAAT  1893 6392  RAJ_13357                     1 NYUMD0011
## CATCAGAAGCACCGCT  1967 6110  RAJ_13357                     1  MSMD0067
## GCGGGTTAGTAGCCGA  1868 5977  RAJ_13357                     1 NYUMD0011
##                         ID percent.mito res.2 res.1 res.0.6 res.0.3
## GTCATTTTCCGCATCT  MSMD0207   0.04191439     0     1       0       0
## ATAAGAGAGGAGTTGC  BIMD0076   0.04362066     0     1       0       0
## CATTATCCATGGTCTA NYUMD0015   0.03086921     0     1       0       0
## CTTAGGAGTGGCGAAT  MSMD0207   0.03613892     0     1       0       0
## CATCAGAAGCACCGCT NYUMD0015   0.04437531    19    10       8       0
## GCGGGTTAGTAGCCGA  MSMD0207   0.03514056     6     2       0       0
##                           barcode      dx     mut Ethnicity Gender Age
## GTCATTTTCCGCATCT GTCATTTTCCGCATCT      PD      PD     White      M  71
## ATAAGAGAGGAGTTGC ATAAGAGAGGAGTTGC      PD     GBA     White      M  47
## CATTATCCATGGTCTA CATTATCCATGGTCTA control control     White      M  51
## CTTAGGAGTGGCGAAT CTTAGGAGTGGCGAAT      PD      PD     White      M  71
## CATCAGAAGCACCGCT CATCAGAAGCACCGCT control control     White      M  51
## GCGGGTTAGTAGCCGA GCGGGTTAGTAGCCGA      PD      PD     White      M  71
##                  Size_Factor louvain_component Cluster garnett_cluster
## GTCATTTTCCGCATCT    5.117674                 1       1               5
## ATAAGAGAGGAGTTGC    5.109379                 1       1               5
## CATTATCCATGGTCTA    4.661479                 1       1               1
## CTTAGGAGTGGCGAAT    4.844786                 1       1               1
## CATCAGAAGCACCGCT    4.610054                 1       1               7
## GCGGGTTAGTAGCCGA    4.508861                 1       1               7
##                  cell_type cluster_ext_type cluster_ext_type_filt      PC1
## GTCATTTTCCGCATCT Monocytes        Monocytes             Monocytes 21.74363
## ATAAGAGAGGAGTTGC Monocytes        Monocytes             Monocytes 20.51278
## CATTATCCATGGTCTA Monocytes        Monocytes             Monocytes 20.73024
## CTTAGGAGTGGCGAAT Monocytes        Monocytes             Monocytes 21.48480
## CATCAGAAGCACCGCT Monocytes        Monocytes             Monocytes 18.94471
## GCGGGTTAGTAGCCGA Monocytes        Monocytes             Monocytes 17.77910
##                          PC2       PC3    UMAP1    UMAP2    UMAP3
## GTCATTTTCCGCATCT  0.72274985 -2.813874 1.365752 1.513548 1.161469
## ATAAGAGAGGAGTTGC  1.02458096 -3.651205 1.354717 1.488010 1.172568
## CATTATCCATGGTCTA -0.03820978 -2.694958 1.351607 1.519749 1.158323
## CTTAGGAGTGGCGAAT -0.42949109 -1.943783 1.369097 1.527969 1.158835
## CATCAGAAGCACCGCT  4.19589928 -4.857566 1.305207 1.470326 1.143383
## GCGGGTTAGTAGCCGA  2.61114660 -7.713996 1.303913 1.455874 1.130939

Biomarker Expression

markerList <- c("CD14", "FCGR3A")
markerData <- DAT@scale.data[row.names(DAT@scale.data) %in% markerList,] %>% t() %>%
  as.matrix() %>%  data.table(keep.rownames = T, key = "rn")
markerData[markerData$markerList[1]==0,] <- NA
markerData[markerData$markerList[2]==0,] <- NA


## Efficiently merge using data.table
# dt1 <- data.table(markerData, keep.rownames = "cell_type", key = "cell_type") %>% copy()
dt2 <- data.table(DAT@meta.data[c("cluster_ext_type_filt","Cluster")], keep.rownames = "Cell", key = "Cell") %>% copy()
row.names(dt2) <- dt2$Cell
markerDT <- markerData[dt2]

ggplot(data = markerDT, aes(x=CD14, y=FCGR3A) ) +
  stat_density_2d(aes(fill = ..level..), geom = "polygon", colour="purple", bins = 100, size=.1) +
  geom_point(aes(color=as.factor(cluster_ext_type_filt)), shape=16, stroke=0, size=2, alpha=.5) +
  guides(colour = guide_legend(title="cluster_ext_type_filt",override.aes = list(alpha = 1))) + 
    scale_color_manual(values = pretty_colors(mDAT, var = "cluster_ext_type_filt"))
## Warning: Computation failed in `stat_density2d()`:
## bandwidths must be strictly positive

tSNE FeaturePlots

Need to get tSNE from Monocle into Seurat somehow.

Monocyte Markers

FeaturePlot(DAT,features.plot =  c("CD14","FCGR3A"),  
            cols.use = c("grey","red","blue","purple"),
            dark.theme = T, nCol = 2, overlay = T, no.legend = F) 

PD Genes

FeaturePlot(DAT,features.plot = c("LRRK2", "GBA"),  
            cols.use = c("grey","purple","cyan","blue"),
            dark.theme = T, nCol = 2, overlay = T, no.legend = F) 

MS4A4A & MS4A6A

FeaturePlot(DAT, features.plot =  c("MS4A4A","MS4A6A"), 
            cols.use = c("grey","red","green","blue"),
            dark.theme = T, nCol = 2, overlay = T, no.legend = F)

Identify Cluster-specific Biomarkers

identify_unique_markers <-function(DAT, clusterA, clusterB, allGenes=F){ 
  DAT <- SetIdent(DAT, ident.use = DAT@meta.data$Cluster) 
  if(allGenes==F){
      clustA.markers <- FindMarkers(DAT, ident.1=clusterA, min.pct = 0.25, 
                                    only.pos = F, test.use = "wilcox")
      clustB.markers <- FindMarkers(DAT, ident.1=clusterB, min.pct = 0.25, 
                                    only.pos = F, test.use = "wilcox") 
  }else{
      clustA.markers <- FindMarkers(DAT, ident.1=clusterA, min.pct = 0.25, 
                                    only.pos = F, test.use = "wilcox",
                                    logfc.threshold = 0, min.pct = 0, min.cells.group = 1,
                                    min.cells.gene = 1, min.diff.pct = -Inf)
      clustB.markers <- FindMarkers(DAT, ident.1=clusterB, min.pct = 0.25, 
                                    only.pos = F, test.use = "wilcox",
                                    logfc.threshold = 0, min.pct = 0, min.cells.group = 1,
                                    min.cells.gene = 1, min.diff.pct = -Inf)
  } 
  clustA.uniqueMarkers <- clustA.markers[!(row.names(clustA.markers) %in% row.names(clustB.markers)),] %>%
    subset(p_val_adj<=0.05)
  clustB.uniqueMarkers <- clustB.markers[!(row.names(clustB.markers) %in% row.names(clustA.markers)),] %>%
    subset(p_val_adj<=0.05)
  
  difference <- abs( length(row.names(clustA.uniqueMarkers)) - length(row.names(clustB.uniqueMarkers) ) )
  uniqueMarkers <- data.frame(Cluster0_markers=c(row.names(clustA.uniqueMarkers), rep("",difference) ),
                              Cluster1_markers=row.names(clustB.uniqueMarkers))
  write.csv(uniqueMarkers,
            file.path(resultsPath,"unique_cluster_markers.csv"), 
            quote = F, row.names = F)
  createDT(uniqueMarkers, "Unique/Mutually Exclusive Markers of Cluster 0 and Cluster 1") 
  return(uniqueMarkers)
}

 
uniqueMarkers <- identify_unique_markers(clustDAT, clusterA = 1, clusterB = 2, allGenes=allGenes)

DGE Across Clusters

Disease

# clustDAT@meta.data$dx %>% unique()
# FDR <- 0.05/dim(DEGs)[1]
# subset(DEGs, p_val<FDR)
DEGs <- runDGE(clustDAT, meta_var = "dx", group1 = "PD", group2 = "control", 
               allGenes = F) 

Mutation

# clustDAT@meta.data$dx %>% unique()
DEGs <- runDGE(clustDAT, meta_var = "mut", group1 = "PD", group2 = "GBA", 
               allGenes = F)
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).

DGE Within Clusters

Disease

DGE_within_clusters(clustDAT,  meta_var = "dx", group1 = "PD", group2 = "control", clusterList = c(0,1), 
               allGenes = F)

Cluster 0: PD vs. control

## Error in WhichCells(object = object, ident = ident.use, ident.remove = ident.remove, : Identity : 0 not found.

Mutation

DGE_within_clusters(DAT, meta_var = "mut", group1 = "PD", group2 = "GBA", clusterList = c(0,1), 
               allGenes = F)

Cluster 0: PD vs. GBA

## Error in WhichCells(object = object, ident = ident.use, ident.remove = ident.remove, : Identity : 0 not found.

DEG Enrichment w/ enrichR

enrichr_dbs <- c("KEGG_2018", "Reactome_2016",
                 "GO_Biological_Process_2018", "GO_Molecular_Function_2018", "GO_Cellular_Component_2018", 
                 "Rare_Diseases_AutoRIF_ARCHS4_Predictions", "Human_Gene_Atlas")
# createDT(enrichR::listEnrichrDbs(), "Enrichr Databases")
 
geneList <- data.frame(Gene=row.names(DEGs_monocytes), 
     Weight=scales::rescale(length(DEGs_monocytes$p_val_adj):1))

results <- enrichr(genes = geneList, databases = enrichr_dbs ) 

Uploading data to Enrichr… Done. Querying KEGG_2018… Done. Querying Reactome_2016… Done. Querying GO_Biological_Process_2018… Done. Querying GO_Molecular_Function_2018… Done. Querying GO_Cellular_Component_2018… Done. Querying Rare_Diseases_AutoRIF_ARCHS4_Predictions… Done. Querying Human_Gene_Atlas… Done. Parsing results… Done.

for (db in enrichr_dbs){
  cat('\n')
  cat("##",db,"\n")  
  # res <- subset(results[[db]], Adjusted.P.value<=0.05)
  createDT_html(results[[db]], paste("Enrichr Results:",db))
  cat('\n')
}  

KEGG_2018

Reactome_2016

GO_Biological_Process_2018

GO_Molecular_Function_2018

GO_Cellular_Component_2018

Rare_Diseases_AutoRIF_ARCHS4_Predictions

Human_Gene_Atlas